# read in catch data
data <- readRDS("data_clean/seine_djfmp_ybfmp.rds")
summary(data)
## EventID Location RegionCode StationCode
## Length:430205 Length:430205 Min. :1.000 Length:430205
## Class :character Class :character 1st Qu.:1.000 Class :character
## Mode :character Mode :character Median :2.000 Mode :character
## Mean :2.812
## 3rd Qu.:4.000
## Max. :7.000
## NA's :15340
## Datetime SampleDate Month
## Min. :1995-01-04 09:40:00.00 Min. :1995-01-04 Min. : 1.000
## 1st Qu.:2002-01-16 12:57:00.00 1st Qu.:2002-01-16 1st Qu.: 4.000
## Median :2007-10-12 08:25:00.00 Median :2007-10-12 Median : 6.000
## Mean :2007-08-29 06:37:40.15 Mean :2007-08-28 Mean : 6.428
## 3rd Qu.:2013-05-06 10:00:00.00 3rd Qu.:2013-05-06 3rd Qu.: 9.000
## Max. :2019-12-31 12:13:00.00 Max. :2019-12-31 Max. :12.000
##
## Jday MethodCode GearConditionCode WeatherCode
## Min. : 1.0 Length:430205 Min. :1.00 Length:430205
## 1st Qu.: 99.0 Class :character 1st Qu.:1.00 Class :character
## Median :167.0 Mode :character Median :1.00 Mode :character
## Mean :180.1 Mean :1.04
## 3rd Qu.:273.0 3rd Qu.:1.00
## Max. :366.0 Max. :2.00
##
## DO WaterTemp Turbidity Secchi
## Min. : 1.02 Min. : 3.00 Min. : 0.14 Min. :0.0
## 1st Qu.: 7.70 1st Qu.:12.20 1st Qu.: 7.75 1st Qu.:0.2
## Median : 8.77 Median :16.67 Median : 14.40 Median :0.2
## Mean : 8.81 Mean :16.44 Mean : 25.04 Mean :0.2
## 3rd Qu.: 9.85 3rd Qu.:20.50 3rd Qu.: 29.60 3rd Qu.:0.3
## Max. :159.60 Max. :31.80 Max. :1000.00 Max. :1.2
## NA's :266407 NA's :6221 NA's :298836 NA's :416088
## SpecificConductance FlowDebris SiteDisturbance AlternateSite
## Min. : 0.58 Min. : NA Length:430205 Length:430205
## 1st Qu.: 120.90 1st Qu.: NA Class :character Class :character
## Median : 164.20 Median : NA Mode :character Mode :character
## Mean : 301.57 Mean :NaN
## 3rd Qu.: 345.00 3rd Qu.: NA
## Max. :34000.00 Max. : NA
## NA's :107003 NA's :430205
## Volume IEPFishCode ForkLength CountAdj
## Min. : 1.20 Length:430205 Min. : 0.00 Min. : 0.000
## 1st Qu.: 26.95 Class :character 1st Qu.: 0.00 1st Qu.: 0.000
## Median : 42.00 Mode :character Median : 0.00 Median : 0.000
## Mean : 47.99 Mean : 19.45 Mean : 2.047
## 3rd Qu.: 63.00 3rd Qu.: 33.00 3rd Qu.: 1.000
## Max. :945.00 Max. :800.00 Max. :901.200
## NA's :902
## GearCode Year Program
## Length:430205 Min. :1995 Length:430205
## Class :character 1st Qu.:2002 Class :character
## Mode :character Median :2007 Mode :character
## Mean :2007
## 3rd Qu.:2013
## Max. :2019
##
## Add region:
## DJFMP region codes:
## 1 = Sacramento River
## 2, 3, 4, 7 = Delta
## 6 = SF Bay (should not appear in the data at this point)
## 5 = San Joaquin
# Complete set of regions:
all_regions <- c("Sacramento","Yolo","Liberty_Island","Delta","San_Joaquin")
data <- data %>%
mutate(Region = case_when(Program == "DJFMP" & RegionCode == 2 &
grepl("^LI",StationCode) ~ "Liberty_Island",
RegionCode %in% c(1) ~ "Sacramento",
RegionCode %in% c(2,3,4,7) ~ "Delta",
RegionCode %in% c(5) ~ "San_Joaquin",
grepl("YBFMP", EventID) ~ "Yolo"))
## Carry out subsetting that is common to all species data sets:
data <- data %>%
filter(!(is.na(Volume))) %>% # Removing samples without volume data
select(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
IEPFishCode, ForkLength, CountAdj, Volume, DO, WaterTemp,
Turbidity, Secchi, SpecificConductance) %>%
mutate(IEPFishCode = as.character(IEPFishCode))
# Retrieve site coordinates for mapping:
djfmp_site_file <- contentid::resolve("hash://sha256/f0f9e66da7415df90a7c0ea4c01938ecadd71ad412af1ccc940e861e63e96ba7")
djfmp_sites0 <- read_csv(djfmp_site_file)
## Rows: 108 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): MethodCode, Location, StationCode
## dbl (2): Latitude, Longitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ybfmp_sites_file <- contentid::resolve("hash://sha256/acc9940abf5662e81ee594553e7dc46a05c4cace9c924dbf5352c0544bc7a481")
ybfmp_sites0 <- read_csv(ybfmp_sites_file)
## Rows: 26 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): StationCode, StationName, StationNumber, PeriodOfRecordFrom, Period...
## dbl (2): Latitude, Longitude
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
any(duplicated(djfmp_sites0$StationCode, ybfmp_sites0$StationCode)) # should be FALSE
## [1] FALSE
all_site_coordinates <- rbind(djfmp_sites0[ ,c("StationCode","Latitude","Longitude")],
ybfmp_sites0[ ,c("StationCode","Latitude","Longitude")])
## Missing lengths have the value 0. Filter those out here.
## SACPIK:
get_SACPIK_age0_data <- function(dat) {
species_data <- dat %>%
filter(IEPFishCode == "SACPIK")
species_data_age0 <- species_data %>%
filter(Month %in% c(6, 7)) %>%
mutate(Age0_bool=(25 <= ForkLength & ForkLength <= 91)) %>%
group_by(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
summarise(TotalCountAdj_age0=sum(CountAdj[Age0_bool]),
.groups="drop") %>%
mutate(CPUE = TotalCountAdj_age0/Volume,
Volume_Z = scale(Volume),
Julian_Z = scale(Jday),
Julian_Sq_Z = scale(Jday^2))
## Double check that all fish are represented:
tmp <- species_data %>%
filter(Month %in% c(6, 7)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 91))
stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
return(species_data_age0)
}
data_SACPIK_age0 <- get_SACPIK_age0_data(data)
## SPLITT:
get_SPLITT_age0_data <- function(dat) {
species_data <- data %>%
filter(IEPFishCode == "SPLITT")
species_data_age0 <- species_data %>%
filter(Month %in% c(5, 6)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 118)) %>%
group_by(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
.groups="drop") %>%
mutate(CPUE = TotalCountAdj_age0/Volume,
Volume_Z = scale(Volume),
Julian_Z = scale(Jday),
Julian_Sq_Z = scale(Jday^2))
## Double check that all fish are represented:
tmp <- species_data %>%
filter(Month %in% c(5, 6)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 118))
stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
return(species_data_age0)
}
data_SPLITT_age0 <- get_SPLITT_age0_data(data)
## SASCUC:
get_SACSUC_age0_data <- function(dat) {
species_data <- data %>%
filter(IEPFishCode == "SACSUC")
species_data_age0 <- species_data %>%
filter(Month %in% c(5, 6)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 86)) %>%
group_by(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
.groups="drop") %>%
mutate(CPUE = TotalCountAdj_age0/Volume,
Volume_Z = scale(Volume),
Julian_Z = scale(Jday),
Julian_Sq_Z = scale(Jday^2))
## Double check that all fish are represented:
tmp <- species_data %>%
filter(Month %in% c(5, 6)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 86))
stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
return(species_data_age0)
}
data_SACSUC_age0 <- get_SACSUC_age0_data(data)
## COMCAR:
get_COMCAR_age0_data <- function(dat) {
species_data <- data %>%
filter(IEPFishCode == "COMCAR")
species_data_age0 <- species_data %>%
filter(Month %in% c(5, 6)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 164)) %>%
group_by(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
.groups="drop") %>%
mutate(CPUE = TotalCountAdj_age0/Volume,
Volume_Z = scale(Volume),
Julian_Z = scale(Jday),
Julian_Sq_Z = scale(Jday^2))
## Double check that all fish are represented:
tmp <- species_data %>%
filter(Month %in% c(5, 6)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 164))
stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
return(species_data_age0)
}
data_COMCAR_age0 <- get_COMCAR_age0_data(data)
## REDSHI:
get_REDSHI_age0_data <- function(dat) {
species_data <- data %>%
filter(IEPFishCode == "REDSHI")
species_data_age0 <- species_data %>%
filter(Month %in% c(3, 4)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 49)) %>%
group_by(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
.groups="drop") %>%
mutate(CPUE = TotalCountAdj_age0/Volume,
Volume_Z = scale(Volume),
Julian_Z = scale(Jday),
Julian_Sq_Z = scale(Jday^2))
## Double check that all fish are represented:
tmp <- species_data %>%
filter(Month %in% c(3, 4)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 49))
stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
return(species_data_age0)
}
data_REDSHI_age0 <- get_REDSHI_age0_data(data)
## GOLDSHI:
get_GOLDSHI_age0_data <- function(dat) {
species_data <- data %>%
filter(IEPFishCode == "GOLDSHI")
species_data_age0 <- species_data %>%
filter(Month %in% c(6, 7)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 65)) %>%
group_by(Program, EventID, Location, RegionCode, Region, StationCode,
SampleDate, Datetime, Year, Month, Jday, GearConditionCode,
Volume, DO, WaterTemp, Turbidity, Secchi, SpecificConductance) %>%
summarise(TotalCountAdj_age0=sum(CountAdj[Age0]),
.groups="drop") %>%
mutate(CPUE = TotalCountAdj_age0/Volume,
Volume_Z = scale(Volume),
Julian_Z = scale(Jday),
Julian_Sq_Z = scale(Jday^2))
## Double check that all fish are represented:
tmp <- species_data %>%
filter(Month %in% c(6, 7)) %>%
mutate(Age0=(25 <= ForkLength & ForkLength <= 65))
stopifnot(sum(species_data_age0$TotalCountAdj_age0) == sum(tmp$CountAdj[tmp$Age0]))
return(species_data_age0)
}
data_GOLDSHI_age0 <- get_GOLDSHI_age0_data(data)
## Check region designations by mapping:
stopifnot(sum(is.na(data_SACPIK_age0$Region)) == 0)
stopifnot(sum(is.na(data_SPLITT_age0$Region)) == 0)
stopifnot(sum(is.na(data_SACSUC_age0$Region)) == 0)
stopifnot(sum(is.na(data_COMCAR_age0$Region)) == 0)
stopifnot(sum(is.na(data_REDSHI_age0$Region)) == 0)
stopifnot(sum(is.na(data_GOLDSHI_age0$Region)) == 0)
## Compile unique sites represented in the final data sets and check region assignments:
uniq_sites_in_data <- do.call("rbind", list(data_SACPIK_age0,
data_SPLITT_age0,
data_SACSUC_age0,
data_COMCAR_age0,
data_REDSHI_age0,
data_GOLDSHI_age0)) %>%
distinct(Program, Location, StationCode, Region) %>%
left_join(all_site_coordinates, by="StationCode") %>%
mutate(Region_f = factor(Region, levels=all_regions, ordered=TRUE))
pal <- leaflet::colorFactor(viridis::turbo(256), uniq_sites_in_data$Region_f)
uniq_sites_in_data %>%
leaflet::leaflet() %>%
leaflet::addTiles() %>%
leaflet::addCircleMarkers(
color = ~pal(Region_f),
stroke = FALSE,
fillOpacity = 1,
radius = 4,
lng = ~Longitude,
lat = ~Latitude,
label = ~StationCode) %>%
leaflet::addLegend(
title = "Region",
pal = pal,
values = ~Region_f,
position = "bottomright",
opacity = 1)
createEffortPlot <- function(dat) {
effort_summary <- dat %>%
mutate(#Year_f=factor(Year, levels=sort(unique(Year)), ordered=TRUE),
Month_f=factor(Month),
Region_f = factor(Region, levels=all_regions, ordered=TRUE)) %>%
group_by(Year, Month_f, Region_f, .drop=FALSE) %>%
summarise(NumOfUniqSites=length(unique(StationCode)),
NumOfSamples=dplyr::n(),
.groups="drop") %>%
mutate(#Year=paste0("'",substr(as.character(Year_f),3,4)),
Month_num=as.numeric(as.character(Month_f)),
Month=factor(Month_num, levels=sort(unique(Month_num)),
labels=month.name[sort(unique(Month_num))], ordered=TRUE))
ret <- ggplot(effort_summary) +
geom_col(aes(x=Year, y=NumOfSamples)) +
geom_text(aes(x=Year, y=140, label=NumOfUniqSites), size=2.5, col="blue") +
facet_grid(rows=vars(Region_f), cols=vars(Month)) +
scale_x_continuous(minor_breaks=seq(1995, 2019, by=1))
plot(ret)
}
p_effort_SACPIK <- createEffortPlot(data_SACPIK_age0) + ggtitle("Pikeminnow")
p_effort_SPLITT <- createEffortPlot(data_SPLITT_age0) + ggtitle("Splittail")
p_effort_SACSUC <- createEffortPlot(data_SACSUC_age0) + ggtitle("Sucker")
p_effort_COMCAR <- createEffortPlot(data_COMCAR_age0) + ggtitle("Carp")
p_effort_REDSHI <- createEffortPlot(data_REDSHI_age0) + ggtitle("Red shiner")
p_effort_GOLDSHI <- createEffortPlot(data_GOLDSHI_age0) + ggtitle("Golden shiner")
pdf("Figures/effort_figures.pdf", onefile=TRUE, width=10)
p_effort_SACPIK
p_effort_SPLITT
p_effort_SACSUC
p_effort_COMCAR
p_effort_REDSHI
p_effort_GOLDSHI
dev.off()
## png
## 2
calculateIndices <- function(dat) {
## Require that all selected months be present to have a non-missing index.
## Assume that all selected months are represented in dat and use tidyr::complete().
ret <- dat %>%
group_by(StationCode, Year, Month, Region) %>%
summarise(CPUE = mean(CPUE),
.groups="drop") %>%
group_by(Year, Month, Region) %>%
summarise(CPUE = mean(CPUE),
.groups="drop") %>%
tidyr::complete(Year, Month, Region) %>%
group_by(Year, Region) %>%
summarise(Index = mean(CPUE),
.groups="drop") %>%
mutate(log_Index=log(Index + 0.00001)) %>%
tidyr::pivot_wider(id_cols=Year,
names_from=Region,
values_from=c(Index, log_Index)) %>%
mutate(Index_Watershed=Index_Delta + Index_Sacramento +
Index_San_Joaquin + Index_Yolo + Index_Liberty_Island)
return(ret)
}
SACPIK_Index_Final <- calculateIndices(data_SACPIK_age0)
SPLITT_Index_Final <- calculateIndices(data_SPLITT_age0)
SACSUC_Index_Final <- calculateIndices(data_SACSUC_age0)
COMCAR_Index_Final <- calculateIndices(data_COMCAR_age0)
REDSHI_Index_Final <- calculateIndices(data_REDSHI_age0)
GOLDSHI_Index_Final <- calculateIndices(data_GOLDSHI_age0)
## Figures:
plotIndices <- function(index_dat) {
ret <- index_dat %>%
dplyr::select(-c(log_Index_Delta, log_Index_Liberty_Island, log_Index_Sacramento,
log_Index_San_Joaquin, log_Index_Yolo)) %>%
tidyr::pivot_longer(cols=-Year, names_to="Region", values_to="Index",
names_prefix="Index_") %>%
dplyr::mutate(Region_f=factor(Region, levels=c(all_regions,"Watershed"),
ordered=TRUE)) %>%
ggplot() +
geom_col(aes(x=Year, y=Index)) +
facet_grid(rows=vars(Region_f), scales="free_y") +
#scale_y_continuous(limits=c(0, NA), expand=c(0,0)) +
scale_x_continuous(breaks=seq(1995, 2019, by=2))
return(ret)
}
p_SACPIK <- plotIndices(SACPIK_Index_Final) + ggtitle("Pikeminnow")
p_SACPIK
p_SPLITT <- plotIndices(SPLITT_Index_Final) + ggtitle("Splittail")
p_SPLITT
p_SACSUC <- plotIndices(SACSUC_Index_Final) + ggtitle("Sucker")
p_SACSUC
p_COMCAR <- plotIndices(COMCAR_Index_Final) + ggtitle("Carp")
p_COMCAR
p_REDSHI <- plotIndices(REDSHI_Index_Final) + ggtitle("Red shiner")
p_REDSHI
p_GOLDSHI <- plotIndices(GOLDSHI_Index_Final) + ggtitle("Golden shiner")
p_GOLDSHI
pdf("Figures/index_figures.pdf", onefile=TRUE)
p_SACPIK
p_SPLITT
p_SACSUC
p_COMCAR
p_REDSHI
p_GOLDSHI
dev.off()
## png
## 2
Sacramento Pikeminnow: May 1 - June 30
Splittail: February 22 - June 8
Sacramento sucker: March 10 - May 23
Common Carp: April 30 - June 30
Red Shiner: May 9 - July 24
Golden Shiner: April 30 - June 30
environ_date_lookup <- data.frame(
species=c("SACPIK","SPLITT","SACSUC","COMCAR","REDSHI","GOLDSHI"),
min_day=c("05-01","02-22","03-10","04-30","05-09","04-30"),
max_day=c("06-30","06-08","05-23","06-30","07-24","06-30")
)
## Discharge data is taken DNR Day Flow data
# SJ = SJR
# Sac = SAC - not going to use YOLO because the sites are upstream of the bypass
# Delta = OUT
## Timing of flows:
## Centroid
# Looking at timing of flow by julian day in each region
# Formula taken from Sommer et al. 2011
# PF = sum(Julian day * flow)/sum(flow)
# Going to calculate timing of flow for Jan - May because peak spawning for all species should have started by May and high flows earlier may have allowed them to reach the spawning grounds
Discharge <- read.csv("data/dayflow_1994_2019.csv") %>%
filter(Year %in% 1995:2019) %>%
mutate(Date=as.Date(Date, "%d-%b-%y"),
Julian_Date=as.POSIXlt(Date)$yday + 1)
# Variables were created below anticipating reviewers thinking about flow earlier that Jan
# Made a variable for water year w/ Oct as month 1
# Then did a julian date with Oct 1 as 1
# Did case when due to leap years
# This needs to be checked before it is used
Discharge_WY <- Discharge %>%
mutate(Water_Year=ifelse(Mo >= 10, Year + 1, Year),
WaterYearStartDate=as.Date(paste0(Water_Year - 1,"-10-01")),
Julian_Date_Water_Year=as.numeric(Date - WaterYearStartDate) + 1)
calculateMeanFlowAndTiming <- function(discharge_dat, plot_opt=TRUE) {
species <- unique(discharge_dat$species)
spawning_window_min_day <- unique(discharge_dat$min_day)
spawning_window_max_day <- unique(discharge_dat$max_day)
## sw = "spawning window"
sw_flow_wide <- discharge_dat %>%
dplyr::mutate(ref_date_min=as.Date(paste(Year,min_day, sep="-")),
ref_date_max=as.Date(paste(Year,max_day, sep="-"))) %>%
dplyr::filter(ref_date_min <= Date & Date <= ref_date_max) %>%
dplyr::select(Julian_Date, Year, Date, ref_date_min, ref_date_max,
SJR, SAC, OUT, YOLO) %>%
dplyr::group_by(Year) %>%
dplyr::reframe(Julian_Date, Year, Date, ref_date_min, ref_date_max,
SJR, SAC, OUT, YOLO,
## Save here for optional plotting later:
scaled_SJ=SJR/sum(SJR),
scaled_Sac=SAC/sum(SAC),
scaled_Delta=OUT/sum(OUT),
scaled_Yolo=YOLO/sum(YOLO))
sw_flow_summary_wide <- sw_flow_wide %>%
dplyr::group_by(Year) %>%
dplyr::summarise(
MeanFlow_SJ = mean(SJR),
MeanFlow_Sac = mean(SAC),
MeanFlow_Delta = mean(OUT),
MeanFlow_Yolo = mean(YOLO),
## Time of flow within the selected spawning window:
FlowTiming_SJ = sum(Julian_Date * scaled_SJ),
FlowTiming_Sac = sum(Julian_Date * scaled_Sac),
FlowTiming_Delta = sum(Julian_Date * (scaled_Delta)),
FlowTiming_Yolo = sum(Julian_Date * (scaled_Yolo)),
.groups="drop")
## Optional plotting:
if(plot_opt) {
scaled_sw_flow_long <- sw_flow_wide %>%
dplyr::select(Year, Julian_Date, Date, ref_date_min, ref_date_max,
scaled_SJ, scaled_Sac, scaled_Delta, scaled_Yolo) %>%
tidyr::pivot_longer(cols=c(scaled_SJ, scaled_Sac, scaled_Delta, scaled_Yolo),
names_to="Region",
values_to="ScaledFlow",
names_prefix="scaled_")
flow_timing_long_tmp <- sw_flow_summary_wide %>%
dplyr::select(Year, FlowTiming_SJ, FlowTiming_Sac, FlowTiming_Delta,
FlowTiming_Yolo) %>%
tidyr::pivot_longer(cols=-Year,
names_to="Region",
values_to="FlowTimingJulianDay",
names_prefix="FlowTiming_")
tmp_interpolation_df <- scaled_sw_flow_long %>%
dplyr::left_join(flow_timing_long_tmp, by=c("Year","Region"))
tmp_interpolation_list <- split(tmp_interpolation_df, f= ~ Region + Year)
flow_timing_long <- do.call("rbind", lapply(tmp_interpolation_list, function(dat) {
dat$ScaledFlowInterpolated <- approx(x=dat$Julian_Date, y=dat$ScaledFlow,
xout=unique(dat$FlowTimingJulianDay),
method="linear")$y
ret <- dat %>%
dplyr::select(Year, Region, FlowTimingJulianDay, ScaledFlowInterpolated) %>%
dplyr::distinct()
return(ret)
}))
p <- ggplot(scaled_sw_flow_long) +
geom_line(aes(x=Julian_Date, y=ScaledFlow, col=Region)) +
facet_wrap( ~ Year, scales="free_y") +
scale_y_continuous(limits=c(0,NA)) +
geom_segment(data=flow_timing_long,
aes(x=FlowTimingJulianDay, xend=FlowTimingJulianDay,
y=0, yend=ScaledFlowInterpolated), col="black") +
geom_point(data=flow_timing_long,
aes(x=FlowTimingJulianDay, y=ScaledFlowInterpolated, fill=Region,
col=Region, pch=Region),
size=2) +
theme_bw() +
labs(x="Spawning window Julian date", y="Scaled flow: daily_flow/sum(daily_flow)",
title=paste0(species, "; using spawning window: ",spawning_window_min_day,
" through ", spawning_window_max_day)) +
scale_shape_manual(values=c(21,22,24,25))
print(p)
}
return(sw_flow_summary_wide)
}
SACPIK_Discharge <- Discharge %>%
dplyr::mutate(species="SACPIK") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanFlowAndTiming()
SPLITT_Discharge <- Discharge %>%
dplyr::mutate(species="SPLITT") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanFlowAndTiming()
SACSUC_Discharge <- Discharge %>%
dplyr::mutate(species="SACSUC") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanFlowAndTiming()
COMCAR_Discharge <- Discharge %>%
dplyr::mutate(species="COMCAR") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanFlowAndTiming()
REDSHI_Discharge <- Discharge %>%
dplyr::mutate(species="REDSHI") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanFlowAndTiming()
GOLDSHI_Discharge <- Discharge %>%
dplyr::mutate(species="GOLDSHI") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanFlowAndTiming()
# ## Timing of flow by year:
# Flow_Timing_Y <- Discharge %>%
# filter(Mo %in% c(1:5)) %>%
# group_by(Year) %>%
# summarise(FlowTiming_SJ = sum(Julian_Date * SJR)/sum(SJR),
# FlowTiming_Sac = sum(Julian_Date * SAC)/sum(SAC),
# FlowTiming_Delta = sum(Julian_Date * (OUT))/sum(OUT),
# FlowTiming_Yolo = sum(Julian_Date * (YOLO))/sum(YOLO))
#
# ## Timing of flow by water year:
# Flow_Timing_WY <- Discharge_WY %>%
# filter(Mo %in% c(10:12, 1:5)) %>%
# group_by(Water_Year) %>%
# summarise(FlowTimingWY_SJ = sum(Julian_Date_Water_Year * SJR)/sum(SJR),
# FlowTimingWY_Sac = sum(Julian_Date_Water_Year * SAC)/sum(SAC),
# FlowTimingWY_Delta = sum(Julian_Date_Water_Year * (OUT))/sum(OUT),
# FlowTiming_Yolo = sum(Julian_Date * (YOLO))/sum(YOLO))
#
# head(Flow_Timing_Y)
# head(Flow_Timing_WY)
calculateMeanWaterTemp <- function(survey_dat) {
## Similar to calculateIndices() ...
## Require that all selected months be present to have a non-missing index.
## Assume that all selected months are represented in dat and use tidyr::complete().
ret <- survey_dat %>%
dplyr::mutate(ref_date_min=as.Date(paste(Year,min_day, sep="-")),
ref_date_max=as.Date(paste(Year,max_day, sep="-"))) %>%
dplyr::filter(ref_date_min <= SampleDate & SampleDate <= ref_date_max) %>%
group_by(StationCode, Year, Region) %>%
summarise(Mean_WT = mean(WaterTemp, na.rm=TRUE),
.groups="drop") %>%
group_by(Year, Region) %>%
summarise(Mean_WT = mean(Mean_WT),
.groups="drop") %>%
tidyr::complete(Year, Region) %>%
tidyr::pivot_wider(id_cols=Year,
names_from=Region,
values_from=Mean_WT,
names_prefix="MeanWT_")
return(ret)
}
SACPIK_MeanWT <- data %>%
dplyr::mutate(species="SACPIK") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanWaterTemp()
SPLITT_MeanWT <- data %>%
dplyr::mutate(species="SPLITT") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanWaterTemp()
SACSUC_MeanWT <- data %>%
dplyr::mutate(species="SACSUC") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanWaterTemp()
COMCAR_MeanWT <- data %>%
dplyr::mutate(species="COMCAR") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanWaterTemp()
REDSHI_MeanWT <- data %>%
dplyr::mutate(species="REDSHI") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanWaterTemp()
GOLDSHI_MeanWT <- data %>%
dplyr::mutate(species="GOLDSHI") %>%
dplyr::left_join(environ_date_lookup, by="species") %>%
calculateMeanWaterTemp()
addStandardizedCovariates <- function(x) {
all_names <- names(x)
covar_names <- all_names[all_names != "Year"]
for(this_col in covar_names) {
x[ ,paste0(this_col,"_stand")] <- scale(x[ ,this_col])
}
return(x)
}
## SACPIK:
SACPIK_Covariates <- dplyr::full_join(SACPIK_Discharge, SACPIK_MeanWT, by="Year")
SACPIK_Covariates <- addStandardizedCovariates(SACPIK_Covariates)
SACPIK_final <- dplyr::left_join(SACPIK_Index_Final,
SACPIK_Covariates,
by="Year")
## SPLITT:
SPLITT_Covariates <- dplyr::full_join(SPLITT_Discharge, SPLITT_MeanWT, by="Year")
SPLITT_Covariates <- addStandardizedCovariates(SPLITT_Covariates)
SPLITT_final <- dplyr::left_join(SPLITT_Index_Final,
SPLITT_Covariates,
by="Year")
## SACSUC:
SACSUC_Covariates <- dplyr::full_join(SACSUC_Discharge, SACSUC_MeanWT, by="Year")
SACSUC_Covariates <- addStandardizedCovariates(SACSUC_Covariates)
SACSUC_final <- dplyr::left_join(SACSUC_Index_Final,
SACSUC_Covariates,
by="Year")
## COMCAR:
COMCAR_Covariates <- dplyr::full_join(COMCAR_Discharge, COMCAR_MeanWT, by="Year")
COMCAR_Covariates <- addStandardizedCovariates(COMCAR_Covariates)
COMCAR_final <- dplyr::left_join(COMCAR_Index_Final,
COMCAR_Covariates,
by="Year")
## REDSHI:
REDSHI_Covariates <- dplyr::full_join(REDSHI_Discharge, REDSHI_MeanWT, by="Year")
REDSHI_Covariates <- addStandardizedCovariates(REDSHI_Covariates)
REDSHI_final <- dplyr::left_join(REDSHI_Index_Final,
REDSHI_Covariates,
by="Year")
## GOLDSHI:
GOLDSHI_Covariates <- dplyr::full_join(GOLDSHI_Discharge, GOLDSHI_MeanWT, by="Year")
GOLDSHI_Covariates <- addStandardizedCovariates(GOLDSHI_Covariates)
GOLDSHI_final <- dplyr::left_join(GOLDSHI_Index_Final,
GOLDSHI_Covariates,
by="Year")
plot_corr <- function(dat, species, region) {
stopifnot(region %in% c("Delta","Liberty Island","Yolo","Sacramento","San Joaquin"))
if(region == "Delta") {
use_dat <- dat[ ,c("Index_Delta","MeanFlow_Delta_stand",
"FlowTiming_Delta_stand","MeanWT_Delta_stand")]
} else if(region == "Liberty Island") {
## Using Yolo flow for Liberty Island??
use_dat <- dat[ ,c("Index_Liberty_Island","MeanFlow_Yolo_stand",
"FlowTiming_Yolo_stand","MeanWT_Liberty_Island_stand")]
} else if(region == "Yolo") {
use_dat <- dat[ ,c("Index_Yolo","MeanFlow_Yolo_stand",
"FlowTiming_Yolo_stand","MeanWT_Yolo_stand")]
} else if(region == "Sacramento") {
use_dat <- dat[ ,c("Index_Sacramento","MeanFlow_Sac_stand",
"FlowTiming_Sac_stand","MeanWT_Sacramento_stand")]
} else if(region == "San Joaquin") {
use_dat <- dat[ ,c("Index_San_Joaquin","MeanFlow_SJ_stand",
"FlowTiming_SJ_stand","MeanWT_San_Joaquin_stand")]
}
pairs(use_dat,
lower.panel=function(x, y) {
text(x, y, paste0("'",substr(dat$Year, 3, 4)))
},
upper.panel=function(x, y) {
text(mean(range(x, na.rm=TRUE)), mean(range(y, na.rm=TRUE)),
round(cor(x, y, use="complete.obs"), digits=2), col="red", cex=2)
}, main=paste(species, region, sep=" - "))
}
plot_corr(SACPIK_final, species="Pikeminnow", region="Delta")
plot_corr(SPLITT_final, species="Splittail", region="Delta")
plot_corr(SACSUC_final, species="Sucker", region="Delta")
plot_corr(COMCAR_final, species="Carp", region="Delta")
plot_corr(REDSHI_final, species="Red shiner", region="Delta")
plot_corr(GOLDSHI_final, species="Golden shiner", region="Delta")
plot_corr(SACPIK_final, species="Pikeminnow", region="Liberty Island")
plot_corr(SPLITT_final, species="Splittail", region="Liberty Island")
plot_corr(SACSUC_final, species="Sucker", region="Liberty Island")
plot_corr(COMCAR_final, species="Carp", region="Liberty Island")
plot_corr(REDSHI_final, species="Red shiner", region="Liberty Island")
plot_corr(GOLDSHI_final, species="Golden shiner", region="Liberty Island")
plot_corr(SACPIK_final, species="Pikeminnow", region="Yolo")
plot_corr(SPLITT_final, species="Splittail", region="Yolo")
plot_corr(SACSUC_final, species="Sucker", region="Yolo")
plot_corr(COMCAR_final, species="Carp", region="Yolo")
plot_corr(REDSHI_final, species="Red shiner", region="Yolo")
plot_corr(GOLDSHI_final, species="Golden shiner", region="Yolo")
plot_corr(SACPIK_final, species="Pikeminnow", region="Sacramento")
plot_corr(SPLITT_final, species="Splittail", region="Sacramento")
plot_corr(SACSUC_final, species="Sucker", region="Sacramento")
plot_corr(COMCAR_final, species="Carp", region="Sacramento")
plot_corr(REDSHI_final, species="Red shiner", region="Sacramento")
plot_corr(GOLDSHI_final, species="Golden shiner", region="Sacramento")
plot_corr(SACPIK_final, species="Pikeminnow", region="San Joaquin")
plot_corr(SPLITT_final, species="Splittail", region="San Joaquin")
plot_corr(SACSUC_final, species="Sucker", region="San Joaquin")
plot_corr(COMCAR_final, species="Carp", region="San Joaquin")
plot_corr(REDSHI_final, species="Red shiner", region="San Joaquin")
plot_corr(GOLDSHI_final, species="Golden shiner", region="San Joaquin")
write.csv(SACPIK_final, "data_clean/SACPIK_index.csv", row.names=FALSE)
write.csv(SPLITT_final, "data_clean/SPLITT_index.csv", row.names=FALSE)
write.csv(SACSUC_final, "data_clean/SACSUC_index.csv", row.names=FALSE)
write.csv(COMCAR_final, "data_clean/COMCAR_index.csv", row.names=FALSE)
write.csv(REDSHI_final, "data_clean/REDSHI_index.csv", row.names=FALSE)
write.csv(GOLDSHI_final, "data_clean/GOLDSHI_index.csv", row.names=FALSE)
Calculated using the months used to make the indices
Formula taken from Sommer et al. 2011
CD = sum(RiverKilo * CPUE)/sum(CPUE)
Going to be used as response variable in model with flow/timing/temp as covariates (below)
Could make this code much shorter - group by Region and year and calculate? It seems now we are using the station name to determine region.
# SACSUC_RM <-
# data_SACSUC_age0 %>%
# left_join(Site_RM,
# by = "StationCode") %>%
# filter(CPUE != 0) # Removing records where CPUE was 0. This will prevent NAs in SJ when there were years w/ 0 catch (OW 0 CPUE shouldn't matter)
#
# # Sacramento River
# SACSUC_RM_Sac <-
# SACSUC_RM %>%
# filter(grepl("SR", StationCode)) # Selecting stations that begin w/ SR
# head(SACSUC_RM_Sac)
# # SJ River
# SACSUC_RM_SJ <-
# SACSUC_RM %>%
# filter(grepl("SJ", StationCode)) # Selecting stations that begin w/ SJ
# head(SACSUC_RM_SJ)
# SACSUC_CDist_SAC <-
# SACSUC_RM_Sac %>%
# filter(Year %in% c(1995:2019)) %>%
# group_by(Year) %>%
# summarise(CDist_SAC = sum(RiverKilo * CPUE)/sum(CPUE))
# head(SACSUC_CDist_SAC)
# SACSUC_Month_CDist_SAC <-
# SACSUC_RM_Sac %>%
# filter(Year %in% c(1995:2019)) %>%
# group_by(Year,
# Month) %>%
# summarise(CDist_SAC = sum(RiverKilo * CPUE)/sum(CPUE))
# head(SACSUC_Month_CDist_SAC)
#
# SACSUC_CDist_SJ <-
# SACSUC_RM_SJ %>%
# filter(Year %in% c(1995:2019)) %>%
# group_by(Year) %>%
# summarise(CDist_SJ = sum(RiverKilo * CPUE)/sum(CPUE))
# head(SACSUC_CDist_SJ)
# SACSUC_CDist <-
# SACSUC_CDist_SAC %>%
# left_join(SACSUC_CDist_SJ,
# by = "Year")
# head(SACSUC_CDist)